library(SingleCellExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(scran)
## Loading required package: scuttle
library(scater)
## Loading required package: ggplot2
sce <- readRDS("../data/SC2018/231113_sceAfterDoubletRemoval.rds")
rownames(sce) <- rowData(sce)$geneSymbol
# T-cell markers
plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="CD3D")

plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="CD3E")

plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="CD3G")

plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="TRAC")

# B-cell markers
plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="MS4A1")

plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="CD19")

# NK cell markers
plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="KLRF1")

# CD14 monocytes
plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="CD14")

# CD16 monocytes
plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="FCGR3A")

# erythrocytes
plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="HBA1")

## three smaller clusters
## Dendritic cells: no longer present, it seems
## possibly removed due to doublet calling
plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="LILRA4")

plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="CLEC4C")

## MKI67+ proliferating cells: also likely no longer present
plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="MKI67")

plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="CDK1")

## megakaryocytes
plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="ITGB3")

plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="CMTM5")

set.seed(464688)
# Build a shared nearest-neighbor graph from PCA space
graph <- buildSNNGraph(sce, 
                       use.dimred = 'PCA')

# Leiden clustering on the SNN graph
cluster_leiden <- factor(igraph::cluster_leiden(graph = graph,
                                                 resolution_parameter = 0.008)$membership) 
nlevels(cluster_leiden)
## [1] 13
colData(sce)$cluster_leiden <- cluster_leiden
plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="cluster_leiden") +
  scale_color_manual(values=dayjob::paletteDiscrete(values=unique(sce$cluster_leiden), set="bear"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

ggsave("../plots/231115_umapSuperCentenarians_firstclsutering.pdf", width=12, height=12)

## annotate
cellTypeLabels <- rep(NA, length=length(cluster_leiden))
cellTypeLabels[cluster_leiden ==1] <- "NK"
cellTypeLabels[cluster_leiden ==2] <- "T-cell"
cellTypeLabels[cluster_leiden ==3] <- "CD14 Monocyte"
cellTypeLabels[cluster_leiden ==4] <- "CD16 Monocyte"
cellTypeLabels[cluster_leiden ==5] <- "T-cell"
cellTypeLabels[cluster_leiden ==6] <- "B-cell"
cellTypeLabels[cluster_leiden ==7] <- "CD14 Monocyte"
cellTypeLabels[cluster_leiden ==8] <- "T-cell"
cellTypeLabels[cluster_leiden ==9] <- "unassigned"
cellTypeLabels[cluster_leiden ==10] <- "unassigned"
cellTypeLabels[cluster_leiden ==11] <- "Megakaryocyte"
cellTypeLabels[cluster_leiden ==12] <- "Erythrocyte"
cellTypeLabels[cluster_leiden ==13] <- "unassigned" # only one cell


sce$cellType <- cellTypeLabels
sce <- sce[,!sce$cellType == "unassigned"]

plotReducedDim(sce, 
               dimred = "UMAP_fastMNNCorrected",
               point_size=1/2, 
               colour_by="cellType") +
  scale_color_manual(values=dayjob::paletteDiscrete(values=unique(sce$cellType), set="bear"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Differential abundance on cell type level

library(dayjob)
cellCountsLong <- getCellPopulationCounts(sce,
                                        patientVar = "sample",
                                        cellTypeVar = "cellType",
                                        group = "condition",
                                        format="long")
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ✔ purrr   0.3.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse()   masks IRanges::collapse()
## ✖ dplyr::combine()    masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()      masks matrixStats::count()
## ✖ dplyr::desc()       masks IRanges::desc()
## ✖ tidyr::expand()     masks S4Vectors::expand()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::first()      masks S4Vectors::first()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()     masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()     masks S4Vectors::rename()
## ✖ dplyr::slice()      masks IRanges::slice()
cellCountsWide <- getCellPopulationCounts(sce,
                                        patientVar = "sample",
                                        cellTypeVar = "cellType",
                                        group = "condition",
                                        format="wide")
countMatrix <- cellCountsWide[,-c(1:2)]
countMatrix <- t(as.matrix(countMatrix))


cellCountsLong <- cellCountsLong %>% 
  group_by(patient, group) %>%
  mutate(totalCells = sum(nCells),
         nCellTypes = n(),
         geoMean = prod(nCells+1)^(1/nCellTypes),
         CLR = log((nCells+1) / geoMean )) %>%
  ungroup() %>%
  mutate(fractionCells = nCells / totalCells)

ggplot(cellCountsLong, aes(x=celltype, y=fractionCells, fill=group)) +
  geom_boxplot(outlier.size = 0) +
  geom_point(position = position_dodge(width = .75)) +
  theme_classic()

ggplot(cellCountsLong, aes(x=celltype, y=CLR, fill=group)) +
  geom_boxplot(outlier.size = 0) +
  geom_point(position = position_dodge(width = .75)) +
  theme_classic()

voomCLR

library(limma)
## 
## Attaching package: 'limma'
## The following object is masked from 'package:scater':
## 
##     plotMDS
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA
library(voomCLR)
design <- model.matrix( ~ group, data=cellCountsWide)
v <- voomCLR(counts = countMatrix,
             design = design)
fit <- lmFit(v, design)
plotBeta(fit)

fit <- applyBiasCorrection(fit)
fit <- eBayes(fit)
tt <- topTable(fit, coef=2, n=Inf)
tt
##                     logFC    AveExpr          t      P.Value    adj.P.Val
## B-cell        -1.82591773  0.4269248 -4.7719184 9.658185e-06 0.0000676073
## Megakaryocyte -1.74369386 -3.6121741 -2.9092873 4.852084e-03 0.0169822935
## CD16 Monocyte -0.48557325 -0.5068662 -1.0101589 3.158983e-01 0.5528219425
## Erythrocyte    0.32720225 -2.0724494  0.5465086 5.864542e-01 0.6841965488
## NK             0.40359641  1.6286420  1.1838294 2.404851e-01 0.5528219425
## CD14 Monocyte -0.09978169  1.4810835 -0.3173037 7.519576e-01 0.7519576434
## T-cell        -0.13732434  2.6548395 -0.5481404 5.853393e-01 0.6841965488
##                       B
## B-cell         3.219499
## Megakaryocyte -2.266429
## CD16 Monocyte -5.831231
## Erythrocyte   -6.017585
## NK            -6.040879
## CD14 Monocyte -6.675412
## T-cell        -6.815885

NB-GLM

library(glmmTMB)
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.4.1
## Current Matrix version is 1.5.1
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
## Warning in checkDepPackageVersion(dep_pkg = "TMB"): Package version inconsistency detected.
## glmmTMB was built with TMB version 1.9.0
## Current TMB version is 1.9.1
## Please re-install glmmTMB from source or restore original 'TMB' package (see '?reinstalling' for more information)
resDfNB <- data.frame(population=rownames(countMatrix),
                      diff=rep(NA,nrow(countMatrix)),
                      se=rep(NA,nrow(countMatrix)),
                      pval=rep(NA,nrow(countMatrix)))
for(pp in 1:nrow(countMatrix)){
  curY <- countMatrix[pp,]
  m_p <- glmmTMB(curY ~ -1 + design, 
                 family=nbinom2(link="log"),
                 offset = log(colSums(countMatrix)))
  resDfNB[pp,2:4] <- c(summary(m_p)$coef$cond["designgroupSC",c(1,2,4)])
}
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended

## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended

## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended

## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended

## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended

## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
resDfNB$padj <- p.adjust(resDfNB$pval, "fdr")

resDfNB[order(abs(resDfNB$pval)),]
##      population         diff        se         pval         padj
## 1        B-cell -1.487083994 0.3376134 1.059331e-05 7.415315e-05
## 5 Megakaryocyte -1.340590157 0.5577411 1.623423e-02 5.681982e-02
## 4   Erythrocyte  1.000885355 0.6247627 1.091501e-01 2.546836e-01
## 6            NK  0.423289496 0.3529722 2.304445e-01 4.032778e-01
## 3 CD16 Monocyte -0.275485966 0.3269730 3.994889e-01 4.898243e-01
## 2 CD14 Monocyte  0.166801239 0.2067743 4.198494e-01 4.898243e-01
## 7        T-cell  0.005603286 0.1412258 9.683514e-01 9.683514e-01

edgeR

library(edgeR)
## 
## Attaching package: 'edgeR'
## The following object is masked from 'package:SingleCellExperiment':
## 
##     cpm
d <- DGEList(countMatrix)
d <- calcNormFactors(d)
d <- estimateDisp(d, design)
fit <- glmFit(d, design)
lrt <- glmLRT(fit, coef=2)
lrt$table$padj <- p.adjust(lrt$table$PValue, method="BH")
lrt$table[order(lrt$table$LR, decreasing=TRUE),]
##                     logFC   logCPM           LR       PValue         padj
## B-cell        -2.21817387 16.37263 16.034447094 6.220045e-05 0.0004354031
## Megakaryocyte -1.92182993 10.88345  5.918772905 1.498039e-02 0.0524313589
## Erythrocyte    1.46652866 13.19023  2.346807326 1.255399e-01 0.2929264022
## CD16 Monocyte -0.52063402 14.63788  0.871850660 3.504435e-01 0.6129077775
## NK             0.44714056 17.83270  0.507842587 4.760743e-01 0.6129077775
## CD14 Monocyte  0.24692029 17.30961  0.403378642 5.253495e-01 0.6129077775
## T-cell        -0.01303389 18.91406  0.002409311 9.608518e-01 0.9608517682

LinDA

library(MicrobiomeStat)
library(phyloseq)
## 
## Attaching package: 'phyloseq'
## The following object is masked from 'package:SummarizedExperiment':
## 
##     distance
## The following object is masked from 'package:Biobase':
## 
##     sampleNames
## The following object is masked from 'package:GenomicRanges':
## 
##     distance
## The following object is masked from 'package:IRanges':
## 
##     distance
lindaRes <- LinDA::linda(otu.tab = countMatrix, # rows features, cols samples
                                    meta = as.data.frame(cellCountsWide),
                                    formula = '~ group',
                                    type = 'count',
                                   adaptive=TRUE,
                                   imputation = FALSE)
## Imputation approach is used.
lindaRes$output$groupSC[order(abs(lindaRes$output$groupSC$stat), decreasing=TRUE),]
##                 baseMean log2FoldChange     lfcSE       stat       pvalue
## B-cell        150488.159    -2.64906617 0.5257893 -5.0382653 0.0005079197
## Megakaryocyte   2379.046    -2.62658353 0.7513855 -3.4956535 0.0057682458
## CD16 Monocyte  26655.514    -0.69089188 0.6666986 -1.0362882 0.3244768990
## NK            134889.885     0.59965476 0.6346155  0.9449104 0.3669753458
## T-cell        528302.252    -0.23711169 0.3320906 -0.7139969 0.4915579437
## Erythrocyte     3315.389     0.43444042 1.1260370  0.3858136 0.7077152944
## CD14 Monocyte 153969.756    -0.09222699 0.3090940 -0.2983785 0.7715221240
##                      padj reject df
## B-cell        0.003555438   TRUE 10
## Megakaryocyte 0.020188860   TRUE 10
## CD16 Monocyte 0.642206855  FALSE 10
## NK            0.642206855  FALSE 10
## T-cell        0.688181121  FALSE 10
## Erythrocyte   0.771522124  FALSE 10
## CD14 Monocyte 0.771522124  FALSE 10

Summary of results

allPopulations <- rownames(countMatrix)
nPopulations <- nrow(countMatrix)
allMethods <- c("voomCLR", "LinDA", "edgeR", "NBGLM")
nMethods <- length(allMethods)
resDfAll <- data.frame(population = rep(allPopulations, nMethods),
                       method = rep(allMethods, each=nPopulations),
                       log2FC = c(tt[allPopulations,]$logFC,
                                  lindaRes$output$groupSC[allPopulations,]$log2FoldChange,
                                  lrt$table[allPopulations,]$logFC,
                                  log2(exp(resDfNB$diff))),
                       padj = c(tt[allPopulations,]$adj.P.Val,
                                lindaRes$output$groupSC[allPopulations,]$padj,
                                lrt$table[allPopulations,]$padj,
                                resDfNB$padj)
                       
)
resDfAll$DA <- resDfAll$padj <= 0.05

pComp <- ggplot(resDfAll, 
       aes(x=population, y=method, color=log2FC, size=DA)) +
  geom_point()+
  facet_grid(~population, scales = "free_x", space = "free")+
  theme_light()+
  scale_color_gradient2(low= "blue", mid="gray", high= "red") +
  theme(axis.text.x = element_text(angle=45, hjust = 1, vjust = 1),
        strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1))
pComp
## Warning: Using size for a discrete variable is not advised.

resDfAll$DA <- resDfAll$padj <= 0.06 # to show NB models are also at adjusted p-value around .05

pComp <- ggplot(resDfAll, 
       aes(x=population, y=method, color=log2FC, size=DA)) +
  geom_point()+
  facet_grid(~population, scales = "free_x", space = "free")+
  theme_light()+
  scale_color_gradient2(low= "blue", mid="gray", high= "red") +
  theme(axis.text.x = element_text(angle=45, hjust = 1, vjust = 1),
        strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1))
pComp
## Warning: Using size for a discrete variable is not advised.